#   LibAut
#       outcome regressions, multiple, with missing data imputation
#       Table C6
#   Juraj Medzihorsky
#   2021-08-14

library(Amelia)
library(logistf)
library(arm)
library(parallel)
options(mc.cores=14) # written for a 16-thread linux machine
options(stringsAsFactors=F)
source('helpers.R')

#   sessionInfo()
##  R version 4.0.4 (2021-02-15)
##  Platform: x86_64-pc-linux-gnu (64-bit)
##  Running under: Ubuntu 20.04.2 LTS
##
##  Matrix products: default
##  BLAS/LAPACK: /opt/OpenBLAS/lib/libopenblas_haswellp-r0.3.13.so
##
##  locale:
##   [1] LC_CTYPE=C.UTF-8           LC_NUMERIC=C
##   [3] LC_TIME=en_US.UTF-8        LC_COLLATE=C.UTF-8
##   [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C.UTF-8
##   [7] LC_PAPER=C.UTF-8           LC_NAME=C
##   [9] LC_ADDRESS=C               LC_TELEPHONE=C
##  [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
##  attached base packages:
##  [1] parallel  stats     graphics  grDevices utils     datasets  methods
##  [8] base
##
##  other attached packages:
##  [1] arm_1.11-2     lme4_1.1-26    Matrix_1.3-2   MASS_7.3-53.1
##  [5] logistf_1.24   Amelia_1.8.0   Rcpp_1.0.7     nvimcom_0.9-92
##
##  loaded via a namespace (and not attached):
##   [1] statmod_1.4.35       xfun_0.21            tidyselect_1.1.0
##   [4] purrr_0.3.4          splines_4.0.4        operator.tools_1.6.3
##   [7] lattice_0.20-41      colorspace_2.0-0     vctrs_0.3.6
##  [10] generics_0.1.0       htmltools_0.5.1.1    mgcv_1.8-34
##  [13] base64enc_0.1-3      survival_3.2-10      rlang_0.4.10
##  [16] pillar_1.4.7         nloptr_1.2.2.2       foreign_0.8-81
##  [19] glue_1.4.2           DBI_1.1.1            RColorBrewer_1.1-2
##  [22] jpeg_0.1-9           lifecycle_1.0.0      stringr_1.4.0
##  [25] munsell_0.5.0        gtable_0.3.0         htmlwidgets_1.5.3
##  [28] coda_0.19-4          knitr_1.31           latticeExtra_0.6-29
##  [31] htmlTable_2.2.1      broom_0.7.5          checkmate_2.0.0
##  [34] backports_1.2.1      scales_1.1.1         Hmisc_4.5-0
##  [37] abind_1.4-5          gridExtra_2.3        digest_0.6.27
##  [40] ggplot2_3.3.3        png_0.1-7            stringi_1.5.3
##  [43] formula.tools_1.7.1  dplyr_1.0.4          grid_4.0.4
##  [46] tools_4.0.4          magrittr_2.0.1       tibble_3.0.6
##  [49] Formula_1.2-4        mice_3.13.0          cluster_2.1.1
##  [52] crayon_1.4.1         tidyr_1.1.2          pkgconfig_2.0.3
##  [55] ellipsis_0.3.1       data.table_1.14.0    rstudioapi_0.13
##  [58] assertthat_0.2.1     minqa_1.2.4          R6_2.5.0
##  [61] boot_1.3-27          rpart_4.1-15         nnet_7.3-15
##  [64] nlme_3.1-152         compiler_4.0.4

#------------------------------------------------------------------------------

code_dir <- getwd()
data_dir <- gsub('code$', 'data', code_dir)
tabs_dir <- gsub('code$', 'tables', code_dir)

setwd(data_dir)
dat <- readRDS('outcome_simple_20210810.rds')
setwd(code_dir)

dat$reg_fac <- dat$e_regionpol_6C <- as.factor(dat$e_regionpol_6C)
dat <- subset(dat, !(dem_ep_id%in%'HRV_1992_2004'))

#------------------------------------------------------------------------------

dat_old <- dat

#   filter out columns
vars_to_go <- 
    c('epy',
      'year',
      'reg_fac', 
      'lag1_e_migdppcln',  
      'lag1_e_migdpgro',  
      'lag1_logpop', 
      'lag1_v2pepwrsoc',  
      'lag1_v2xeg_eqdr',  
      'lag1_v2xnp_pres',  
      'lag1_v2csprtcpt', 
      'lag1_v2x_polyarchy', 
      'lag1_excl_region_edi',  
      'lag1_e_miinteco',  
      'lag1_e_miinterc') 

dat <- dat[, c(vars_to_go, 'dem_ep_id')]

#------------------------------------------------------------------------------

bins <- c('lag1_e_miinteco', 'lag1_e_miinterc') 
cats <- c('dem_ep_id', 'reg_fac', 'e_regionpol_6C')

set.seed(123)
system.time(
    a <- amelia(dat, m=100, polytime=3,  
                idvars=c('dem_ep_id', 'epy'), 
                ts='year', cs='reg_fac', noms=c(bins))
)


#------------------------------------------------------------------------------
#   fit

#   formulas
fl0 <- fy0 <- epy ~ 1
linchunk1 <- ~ . + 
    reg_fac +
    lag1_e_migdppcln + 
    lag1_e_migdpgro + 
    lag1_logpop +
    lag1_v2pepwrsoc + 
    lag1_v2xeg_eqdr + 
    lag1_v2xnp_pres + 
    lag1_v2csprtcpt +
    lag1_v2x_polyarchy +
    lag1_excl_region_edi + 
    lag1_e_miinteco + 
    lag1_e_miinterc  
linchunk2 <- ~ . + 
    I((1e-1*(year-1990))) + I((1e-1*(year-1990))^2) + I((1e-1*(year-1990))^3) 
fl1 <- update(fy0, linchunk1)
fl2 <- update(fl1, linchunk2)

o1 <- mclapply(a$imputations, function(j) glm(fl1, data=j, family=stats::gaussian('identity')))
o2 <- mclapply(a$imputations, function(j) glm(fl2, data=j, family=stats::gaussian('identity')))

l1 <- mclapply(a$imputations, function(j) glm(fl1, data=j, family=stats::binomial('logit')))
l2 <- mclapply(a$imputations, function(j) glm(fl2, data=j, family=stats::binomial('logit')))

mcont <- logistf.control(maxit=1e4, maxstep=1e4)
list_f1 <- mclapply(a$imputations, function(i) logistf(fl1, data=i, control=mcont))
list_f2 <- mclapply(a$imputations, function(i) logistf(fl2, data=i, control=mcont))

bo1 <- do.call(rbind, lapply(o1, coef))
bo2 <- do.call(rbind, lapply(o2, coef))
bl1 <- do.call(rbind, lapply(l1, coef))
bl2 <- do.call(rbind, lapply(l2, coef))
bf1 <- do.call(rbind, lapply(list_f1, function(i) i$coefficients))
bf2 <- do.call(rbind, lapply(list_f2, function(i) i$coefficients))

eo1 <- do.call(rbind, lapply(o1, se.coef))
eo2 <- do.call(rbind, lapply(o2, se.coef))
el1 <- do.call(rbind, lapply(l1, se.coef))
el2 <- do.call(rbind, lapply(l2, se.coef))
ef1 <- do.call(rbind, lapply(list_f1, function(i) diag(i$var)^0.5))
ef2 <- do.call(rbind, lapply(list_f2, function(i) diag(i$var)^0.5))


cmondude <-
    function(x)
    {
        z <- do.call(cbind, lapply(x, as.vector))
        rownames(z) <- colnames(x[[1]])
        return(z)
    }

do1 <- cmondude(mi.meld(bo1, eo1))
do2 <- cmondude(mi.meld(bo2, eo2))
dl1 <- cmondude(mi.meld(bl1, el1))
dl2 <- cmondude(mi.meld(bl2, el2))
df1 <- cmondude(mi.meld(bf1, ef1))
df2 <- cmondude(mi.meld(bf2, ef2))


getci <-
    function(x, dig=2)
    {
        ci <- data.frame(term=rownames(x),
                         estimate=round(x[,1], dig),
                         lo=round(x[,1]-1.96*x[,2], dig),
                         hi=round(x[,1]+1.96*x[,2], dig))
        rownames(ci) <- rownames(x)
        return(ci)
    }


ci_o1 <- getci(do1)
ci_o2 <- getci(do2)
ci_l1 <- getci(dl1)
ci_l2 <- getci(dl2)
ci_f1 <- getci(df1)
ci_f2 <- getci(df2)


bb <- 
    data.frame(o1=c(ci_o1$estimate, rep(NA,3)),
               o2=c(ci_o2$estimate),
               l1=c(ci_l1$estimate, rep(NA,3)),
               l2=c(ci_l2$estimate),
               f1=c(ci_f1$estimate, rep(NA,3)),
               f2=c(ci_f2$estimate))

bt <- paste(apply(bb, 1, paste, collapse=' & '), '\\\\')

ci <-
    data.frame(
        o1=c(paste0('(', ci_o1$lo, ', ', ci_o1$hi, ')'), rep('',3)),
        o2=  paste0('(', ci_o2$lo, ', ', ci_o2$hi, ')'),
        l1=c(paste0('(', ci_l1$lo, ', ', ci_l1$hi, ')'), rep('',3)),
        l2=  paste0('(', ci_l2$lo, ', ', ci_l2$hi, ')'),
        f1=c(paste0('(', ci_f1$lo, ', ', ci_f1$hi, ')'), rep('',3)),
        f2=  paste0('(', ci_f2$lo, ', ', ci_f2$hi, ')'))

ct <- paste(apply(ci, 1, paste, collapse=' & '), '\\\\')

bt <- gsub('-', '$-$', bt)
ct <- gsub('-', '$-$', ct)
bt <- gsub('NA', '', bt)
ct <- gsub('NA', '', ct)

tab <- character(length=4*length(bt))
for (i in 1:length(bt)) {
    ii <- (i-1)*3 + i
    tab[ii] <- as.character(ci_l2$term)[i]
    tab[ii+1] <- paste0(' & ', bt[i])
    tab[ii+2] <- paste0(' & ', ct[i])
}

setwd(tabs_dir)
writeLines(tab, con='Table_C6.tex')

#   SCRIPT END